
clear all
set more off

/* KP:  CHANGES MADE TO THIS FILE FROM ORIGINAL ** 
- Added global basepaths to be able to run this file independently
- Saved new versions of all files generated from this do file with _parent addition 
- Line 842:  changed to keep parent vars from original OLD file
- Line 1853:  changed to keep parent vars as well 

* NEW KP 7/14/20:  This file imports ONLY the already processed data files from previous versions for which parent data were available.

*/

global basepath "M:\Massachusetts"      
global basepath3 "M:\Massachusetts\data_setup"      

global raw "$basepath\data_raw" 
global data_setup "$basepath3\data" 
global do_setup "$basepath3\Programs"       
global log "$basepath3/Log" 
global adobase  "M:\Massachusetts" 
global lotto "$basepath\data_lotteries"

global charter "$lotto"
global save "$raw\saves\school by year files"
global dir "$lotto\1_id matching"


cd "$save"
 

*****switches*******
global newdata		"1" // new boston and state data
global bostondata   "1" // original boston data
global match		"1" // final fuzzy matching attempt
global cleanup		"1"	// final organization of data

if "${newdata}"=="1"{
cap prog drop inputcharter
prog define inputcharter 
	syntax , path(string) short(string)
	insheet using "`path'", names clear double
	di "Insheeted `short'"
    cd "$save"
	drop if school==""
	save `short'_raw.dta, replace
	cap drop if obs==. /* get rid of the sumstats row and blank observations */
	cap drop  obs 
	cap drop  gender 
	cap drop raceethnicity 
	*cap drop caplast 
	*cap drop capfirst
	cap drop lastname 
	cap drop firstname 
	*cap drop middlename 
	*KP added 7/14/20
		qui rename middlename mname
	*cap drop birthdate 
	cap drop offerdate 
	cap drop statasasid 
	cap drop townofresidence
	cap drop siblingfirstname 
	cap drop siblinglast  
	cap drop testyear
	cap drop dateaddedtowaitlist
	cap drop statasasid2 
	*EMS added 8-15-2013
		cap drop fuzzysasid 
		cap g double sasid=combinedsasid
		cap drop combinedsasid	
		cap drop combo_sasid
	cap drop siblingmiddle 
	cap drop notes
	cap drop schoolid 
	cap drop enrolllossofseatdate
	cap drop lotterynumber_state 
	cap drop waitlistnumber_state	
	*cap drop stschool ndschool rdschool
	cap drop  initialofferroxp initialoffergh initialofferdp everofferroxp everoffergh everofferdp	
	*CW:  Fix messed up case in CoaH '09
	cap replace sasid="" if sasid=="*1096532213&1077005024"
	cap replace sasid="" if sasid=="x"
	cap destring sasid, replace
	gen dis=1
	cap replace dis=0 if disqualified=="0"|disqualified==""
	cap replace dis=0 if disqualified==0|disqualified==.
	drop disqualified
	ren dis disqualified
	cap tostring  siblingstatus, replace
	cap tostring  siblingstatustype, replace
	cap tostring  outofarea, replace
	tostring lateapplicant, replace
	cap tostring  disqualified, replace
	cap tostring  offeredadmissioninitially, replace
	cap tostring offeredadmissionever, replace
	cap tostring lotterynum, replace
	cap tostring waitlistnum, replace
	cap tostring date, replace
	cap tostring cantmatch, replace
	cap tostring stschool, replace
	cap tostring ndschool, replace
	cap tostring rdschool, replace
	cap tostring dob, replace
	cap replace dob="" if dob=="."
	cap tostring updatedinitialofferroxp, replace 
	cap tostring updatedinitialoffergh, replace
	cap tostring updatedinitialofferdp, replace
	cap tostring updatedeverofferroxp, replace
	cap tostring updatedeveroffergh, replace
	cap tostring updatedeverofferdp, replace
	cap tostring prioritygroupnumber, replace
	cap tostring offerredadmission2ndlotteryever, replace
	cap tostring secondlottery, replace
	cap drop v35 
	cap drop v36 
	cap drop v37 
	cap drop v38
	cap drop v41
	cap tostring dateoflottery, replace
	cap destring basedonschool, replace
	*KP added 7/14/20
	cap tostring parent1firstname, replace
	cap tostring parent2firstname, replace
	cap tostring parent1lastname, replace
	cap tostring parent2lastname, replace
	cap drop dateoffered
	foreach vardrop in lotterynumber_state offeroffwaitlist dateaccepted septemberlottery septembereveroffer campusassignment statsasid comments waitlistfile enrolled declined combined k2 k2lottery k2decline k2ineligible sheet13 k1 k1lottery k1decline k1ineligible status reconciliation cantmatch siblnglastname incodmanapps orderofdrawing admissionsstatus orange updates statamismatched {
		cap drop `vardrop'
	}	
	cap ren 
	cap drop lottery if school=="COAHII" // COAH II 2013
	cap drop v39 
	cap drop v40
	cap drop v9
	cap drop v43
	cap drop v29
	cap drop idnumber
	cap drop accept*
	cap drop wl1_5
	cap drop denied_removed
	cap drop other
	cap drop siblingnote
	cap drop offerpending
	cap drop rematched
	cap drop residency
	cap drop bc
	cap drop column*
	cap drop wlnumber
	cap drop lotteryres*
	cap drop appdate
	cap drop flag
	cap drop rematched
	cap drop abovebar
	cap drop wl
	cap drop assignedroxp assignedgh assigneddp hascampusassignment nocampusassignment
	cap ren offeredadmissioninitianlly offeredadmissioninitially
	
	
	foreach v in roxp gh dp {
		cap destring initialoffer`v', replace
		cap destring everoffer`v', replace
		cap replace initial_offer = initialoffer`v' if (initialoffer`v' !=. & initialoffer`v' != 0)
		cap replace offer = everoffer`v' if (everoffer`v' !=. & everoffer`v' != 0)
	}
	
	*sibs
	qui replace siblingstatus="1" if siblingstatus=="1"|siblingstatus=="x"|siblingstatus=="yes"|siblingstatus=="Yes"
	qui replace siblingstatus="0" if siblingstatus==""|siblingstatus=="no"|siblingstatus=="No"
	qui gen sibling=0
	qui replace sibling=1 if siblingstatustype=="enrolled"|siblingstatustype=="Enrolled"
	qui replace sibling=1 if siblingstatus=="1"
	qui replace sibling=1 if siblingstatustype==""&siblingstatus=="1"
	qui gen siblingapplying=0
	qui replace siblingapplying=1 if siblingstatustype=="applying"|siblingstatustype=="Applying" ///
		|siblingstatustype=="Applying at same time" |siblingstatustype=="Applying twins" ///
		|siblingstatustype=="Applying/Newly admitted" |siblingstatustype=="applied" |siblingstatustype=="Applied"
	qui replace sibling=0 if siblingapplying==1
	cap replace disqualified ="1" if disqualified=="4th grade (not eligible for 6th)"|disqualified=="Retained in 5th grade (not eligible for 6th)" ///
		|disqualified=="no other data" | disqualified=="Withdrew application prior to lottery" ///
		|disqualified=="Out of area"|mismatched=="1"
	cap drop mismatched	

	local vars outofarea lateapplicant offeredadmissioninitially offeredadmissionever disqualified
	foreach v of local vars {
		cap replace `v'="0" if `v'==""|`v'=="No"|`v'=="no" | `v'=="$charter\bridgeboston\formatted\Spring"
		cap replace `v'="1" if `v'=="x"|`v'=="X"|`v'=="Yes"|`v'=="yes"
		cap destring `v', replace
		cap replace `v'=0 if `v'==.
		}
		
	cap ren offeredadmissionever offer
	cap ren offeredadmissioninitially initial_offer
	cap replace initial_offer = initialoffer if initial_offer==.&intialoffer!=.
	qui drop siblingstatus siblingstatustype
	ren applicationyear year
	ren gradeapplying grade
	
	gen `short'=1
		
	cap format sasid %12.0f
	save `short'.dta , replace
end

local shortindex

*CW 1 27 2012:  ADD NEW PATHS HERE

#delimit ;
global pathnames 
`"	
		"$charter\academy of the pacific rim\formatted\APR Spring 2006 matched_FUZZY_parent.txt" 
		"$charter\academy of the pacific rim\formatted\APR Spring 2007 id matched 6th_FUZZY_parent.txt" 
		"$charter\academy of the pacific rim\formatted\APR spring 2007 5th grade stata matched_FUZZY_with_fixedsasids_parent.txt" 
		"$charter\academy of the pacific rim\formatted\APR spring 2008 5th grade stata matched_FUZZY_parent.txt"		
		"$charter\academy of the pacific rim\formatted\APR Spring 2009 Lottery matched_FUZZY_parent.txt"		
		"$charter\academy of the pacific rim\formatted\APR_2010_Matched_FUZZY_parent.txt"	
		"$charter\academy of the pacific rim\formatted\Spring 2011\APR Formatted Matched RECODED_parent.txt"
		"$charter\academy of the pacific rim\formatted\Spring 2012\APR 2012 MATCHED_parent.txt"
		"$charter\academy of the pacific rim\formatted\Spring 2013\APR Sp2013 Formatted Matched Fixed_fixedsasids_parent.txt"
			
			
		"$charter\boston collegiate\formatted\Boston Collegiate Spring 2009 Lottery matched_FUZZY_with_fixedsasids_parent.txt"	
		"$charter\boston collegiate\formatted\Boston_Collegiate_2010_Matched_FUZZY_parent.txt"
				
				
		"$charter\boston prep\formatted\Boston_Prep_2010_Matched_FUZZY_with_fixedsasids_parent.txt"
		"$charter\boston prep\formatted\Spring 2012\Bos Prep 2012 formatted MATCHED_parent.txt"
		"$charter\boston prep\formatted\Spring 2013\Boston Prep Sp2013 Formatted Matched_parent.txt"

		
		"$charter\city on a hill\formatted\City on a Hill Spring 2007 Lottery - matched_FUZZY_with_fixedsasids_parent.txt"  
		"$charter\city on a hill\formatted\City on a Hill Spring 2008 Lottery matched_FUZZY_with_fixedsasids_parent.txt"		
		"$charter\city on a hill\formatted\COAH_2009_Matched_RECODED_parent.txt"
		"$charter\city on a hill\formatted\Spring2010\CH Formatted 2010 Matched_parent.txt"
		"$charter\city on a hill\formatted\Spring2011\CH 2011 Lottery Matched_parent.txt"
		"$charter\city on a hill\formatted\Spring2012\COAH Sp2012 Matched_parent.txt"  
	
	
		"$charter\codman\formatted\Codman Spring 2008 Lottery - matchednew_parent.txt"
		"$charter\codman\formatted\Codman Spring 2010 Lottery MATCHED RECODED_parent.txt"
		"$charter\codman\formatted\Spring2011\Codman Formatted Spring 2011 RECODED MATCHED_with_fixedsasids_parent.txt"
		"$charter\codman\formatted\Spring2013\Codman Sp2013 Formatted Matched_parent.txt"
		
										
		"$charter\match hs and ms\formatted\Spring2011_MS\MatchMS Spring2011 MATCHED_parent.txt"
		"$charter\match hs and ms\formatted\Spring2012_MS\Match MS 2012 Matched_parent.txt"
		"$charter\match hs and ms\formatted\Spring2013_MS\Match MS Sp2013 Formatted Matched_parent.txt"
		"$charter\match hs and ms\formatted\Spring 2014 MS\Match MS Spring 2014 Gr 6 Formatted Matched_parent.txt"
		
		
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2006 Lottery matched_FUZZY_parent.txt"
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2007 Lottery matched_FUZZY_parent.txt"
		"$charter/roxbury prep/formatted/Roxbury Prep Spring 2009 Grade 6 Lottery matched RECODED_FUZZY_parent.txt"
		"$charter\roxbury prep\formatted\Rox Prep Spring 2011 Matched_with_fixedsasids_parent.txt"
		
				

		"$charter\match es\formatted\Spring 2011\Match ES 2011 PreK Formatted Matched_parent.txt"
		"$charter\match es\formatted\Spring 2011\Match ES 2011 Gr 2 Formatted Matched Fixed_parent.txt" 
		"$charter\match es\formatted\Spring 2012\Match ES 2012 Pre K Formatted Matched Fixed_parent.txt"
		"$charter\match es\formatted\Spring 2012\Match ES 2012 Gr 2 Formatted Matched_parent.txt"
		"$charter\match es\formatted\Spring 2013\Match ES 2013 Pre K Formatted Matched_parent.txt"
		"$charter\match es\formatted\Spring 2013\Match ES 2013 Gr 2 Formatted Matched Fixed_with_fixedsasids_parent.txt" 
		
	"' ;
	
#delimit  cr

//Status on nonurban files 
* Cape Cod Lighthouse ends in 2010 (6th grade entrance)
* Four Rivers 2011 added -- Sarah formatted and matched
* Francis Parker 2011 added -- Sarah checked and rematched
* Global Learning -- cannot add w/out paper files, Sarah checked risk sets
* Innovation 2011 added -- Sarah checked and rematched
* KIPP Lynn 2011 sarah checked and rematched -- students may not be old enough yet
* Marblehead 2011 and 2012 possible to add but cohorts too young -- (start in 4th grade)
* PVPA -- cleaned up 2010
* Rising Tide -- cleaned up 2010
* Added Salem Academy 2011 (match rate above 80% but could maybe use some more work)
* Added Sturgis 2011 -- cleanedup and rematched
	
*Hampden

*CW 1 27 2012:  ADD NEW SHORTNAMES FOR NEW LOTTOS
*MAKE SURE SHORTNAMES AND PATHS MATCH UP
local shortnames  ///
	APR2006_6th APR2007_6th APR2007_5th APR2008_5th APR2009_5th APR2010_5th APR2011_5th APR2012_5th APR2013_5th ///
	BosCol2009 BosCol2010 ///
	BosPrep2010 BosPrep2012 BosPrep2013 ///
	CoaH2007 CoaH2008 CoaH2009 Coah2010 Coah2011 Coah2012 ///
	Codman2008 Codman2010 Codman2011 Codman2013 ///	
	MatchMS2011 MatchMS2012 MatchMS2013 MatchMS2014 ///
	RoxPrep2006 RoxPrep2007 RoxPrep2009 RoxPrep2011 ///
	E_MatchK1_2011 E_Match2_2011 E_MatchK1_2012 E_Match2_2012 E_MatchK1_2013 E_Match2_2013  ///
	
foreach p of global pathnames {
	local shortindex=`shortindex'+1
	local item: word `shortindex' of `shortnames'
	* disp "Short=`item', long=`p'"
	inputcharter, path(`p') short(`item')
	}



*CW 1 27 2012:  ADD NEW LABELS TO THIS LIST
*Here, add all shortnames EXCEPT FOR THE FIRST ONE APR2005_6th
local appendnames  ///
	APR2006_6th APR2007_6th APR2007_5th APR2008_5th APR2009_5th APR2010_5th APR2011_5th APR2012_5th APR2013_5th ///
	BosCol2009 BosCol2010 ///
	BosPrep2010 BosPrep2012 BosPrep2013 ///
	CoaH2007 CoaH2008 CoaH2009 Coah2010 Coah2011 Coah2012 ///
	Codman2008 Codman2010 Codman2011 Codman2013 ///	
	MatchMS2011 MatchMS2012 MatchMS2013 MatchMS2014 ///
	RoxPrep2006 RoxPrep2007 RoxPrep2009 RoxPrep2011 ///
	E_MatchK1_2011 E_Match2_2011 E_MatchK1_2012 E_Match2_2012 E_MatchK1_2013 E_Match2_2013
		
use APR2006_6th, clear	
	foreach l of local appendnames {
		di "Going to append `l' next"
		append using `l', force //added force for sturgis2010
	}
	
	drop v*

*This section: 
	*	- codes offers for lottery cutoffs
	
*D.SUN: impute CoaH'09 initial offer by using 2008 year's initial offer
*CoaH 2009
gen lotnum_coah09 = lotterynumber if school == "COAH" & year==2009
destring lotnum_coah09, replace force
replace initial_offer = 1 if school=="COAH" & year==2009 & (lotnum_coah09 <= 175 & lotnum_coah09 >=1 & lotnum_coah09 ~=.)
replace initial_offer = 0 if school=="COAH" & year==2009 & (lotnum_coah09 > 175)
drop lotnum_coah09

*D.Sun 9/12/2013: Impute Roxbury 2002-2005, and 2009 initial offer w/ 2008's initial offer (lottery number: 110)
gen lotnum_roxprep09 = lotterynumber if school == "Roxbury Prep" & year==2009
destring lotnum_roxprep09, replace force
replace initial_offer = 1 if school == "Roxbury Prep" & year==2009 & (lotnum_roxprep09 <= 110 & lotnum_roxprep09 >=1 & lotnum_roxprep09 ~=.)
replace initial_offer = 0 if school == "Roxbury Prep" & year==2009 & (lotnum_roxprep09 > 110)
drop lotnum_roxprep09


/***********SCHOOLS WITH ADDITIONAL RISK SETS***************

* EMS added 8/8/2013. The new schools have additional variables - for now we are not looking at these, but might want to add them in later
	* 1st, 2nd, and 3rd choices, and offers for Uncommon schools applicants

/* Create Risk Sets within Schools EMS 8-19-2013 */	
 destring prioritygroupnumber, replace
 rename prioritygroupnumber applyprioritygroup /* Prioritygroup is for BGA, UPAcademy */
 replace applyprioritygroup = 2 if offerredadmission2ndlotteryever=="1" /* This is the second lottery for EdBrooke3 2012 */
 replace applyprioritygroup = 2 if secondlottery=="1" /* this is Excel I 2009 second lottery. no one got offers */
 replace applyprioritygroup =0 if applyprioritygroup==.
 
 drop offerredadmission2ndlotteryever secondlottery
 
 * for 2014 UCS
	foreach off in initialoffer everoffer {
		destring updated`off'*, replace
		replace updated`off'gh=`off'ls if year==2014 & `off'ls!=.
		replace updated`off'roxp=`off'mh if year==2014 & `off'mh!=.
		replace updated`off'dp=`off'dp if year==2014 & `off'dp!=.
		
		drop `off'ls `off'mh `off'dp
	}	
		
 * for 2012, 2013, and 2014 UCS
	 foreach v in roxp gh dp {
		replace initial_offer = updatedinitialoffer`v' if (updatedinitialoffer`v' !=. & updatedinitialoffer`v' != 0)
		replace offer = updatedeveroffer`v' if (updatedeveroffer`v' !=. & updatedeveroffer`v' != 0)
	}
	gen initial_offerRoxPrep_corr = updatedinitialofferroxp
	gen offerRoxPrep_corr = updatedeverofferroxp
	gen initial_offerGroveH_corr = updatedinitialoffergh
	gen offerGroveH_corr = updatedeveroffergh
	gen initial_offerDP = updatedinitialofferdp
	gen offerDP = updatedeverofferdp
	foreach v in roxp gh dp {	
		drop updatedinitialoffer`v'
		drop updatedeveroffer`v'
	}	

	foreach off in initial_offer offer {
		gen `off'Uncommon = 1 if (`off'RoxPrep_corr==1 | `off'GroveH_corr==1 | `off'DP==1)
			replace `off'Uncommon = 0 if `off'Uncommon==. & school=="UncommonSchools"
	}

**********SCHOOLS WITH ADDITIONAL RISK SETS***************
***Global learning has multiple lotteries, but cutoffs for initial within each lottery and ever cutoff in 2nd lottery

*CW 1 27 2012:  CHECK FOR ADDITIONAL RISK SETS
*2006
gen risk_GL2006_1stlotto=0 /*if school=="Global Learning" &year==2006*/
	replace risk_GL2006_1stlotto=1 if school=="Global Learning" &year==2006&lotterybatch==1
gen risk_GL2006_2ndlotto=0 /*if school=="Global Learning" &year==2006*/
	replace risk_GL2006_2ndlotto=1 if school=="Global Learning" &year==2006&lotterybatch==2
*2007
gen risk_GL2007_1stlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_1stlotto=1 if school=="Global Learning" &year==2007&lotterybatch==1
gen risk_GL2007_2ndlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_2ndlotto=1 if school=="Global Learning" &year==2007&lotterybatch==2
gen risk_GL2007_3rdlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_3rdlotto=1 if school=="Global Learning" &year==2007&lotterybatch==3
*2009:  nobody in 2nd or 3rd lotteries offered
gen risk_GL2009_1stlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2009_1stlotto=1 if school=="Global Learning" &year==2009&lotterybatch==1
gen risk_GL2009_2ndlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2007_2ndlotto=1 if school=="Global Learning" &year==2009&lotterybatch==2
gen risk_GL2009_3rdlotto=0 /*if school=="Global Learning" &year==2007*/
	replace risk_GL2009_3rdlotto=1 if school=="Global Learning" &year==2009&lotterybatch==3
cap drop lotterybatch


***Innovation has "separate" lotteries for "Local (chelmsford" and "out of town" -- but out of towners do get in
*2007
gen risk_IA2007_local=0 /*if  school=="Innovation Academy" &year==2007*/
	replace risk_IA2007_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2007
gen risk_IA2007_outoftown=0  /*if school=="Innovation Academy" &year==2007*/
	replace risk_IA2007_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2007
replace outofarea=. if   school=="Innovation Academy" &year==2007
*2008
gen risk_IA2008_local=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2008_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2008
gen risk_IA2008_outoftown=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2008_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2008
replace outofarea=. if   school=="Innovation Academy" &year==2008
*2009
gen risk_IA2009_local=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2009_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2009
gen risk_IA2009_outoftown=0 /*if school=="Innovation Academy" &year==2008*/
	replace risk_IA2009_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2009
replace outofarea=. if   school=="Innovation Academy" &year==2009
*2010
gen risk_IA2010_local=0 
	replace risk_IA2010_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2010
gen risk_IA2010_outoftown=0 
	replace risk_IA2010_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2010
replace outofarea=. if   school=="Innovation Academy" &year==2010
*2011
gen risk_IA2011_local=0 
	replace risk_IA2011_local=1 if outofarea==0 &  school=="Innovation Academy" &year==2011
gen risk_IA2011_outoftown=0 
	replace risk_IA2011_outoftown=1 if outofarea==1 &  school=="Innovation Academy" &year==2011
replace outofarea=. if   school=="Innovation Academy" &year==2011


***Marblehead has "separate" lotteries for "Local (Marblehead)" and "out of town" -- but out of towners do get in
*2005
gen risk_Marble2005_local=0 /*if school=="Marblehead Community MS" &year==2005*/
	replace risk_Marble2005_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2005
gen risk_Marble2005_outoftown=0 /*if school=="Marblehead Community MS" &year==2005*/
	replace risk_Marble2005_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2005
replace outofarea=. if   school=="Marblehead Community MS" &year==2005
*2006
gen risk_Marble2006_local=0 /*if school=="Marblehead Community MS" &year==2006*/
	replace risk_Marble2006_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2006
gen risk_Marble2006_outoftown=0 /*if school=="Marblehead Community MS" &year==2006*/
	replace risk_Marble2006_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2006
replace outofarea=. if   school=="Marblehead Community MS" &year==2006
*2007
gen risk_Marble2007_local=0 /*if school=="Marblehead Community MS" &year==2007*/
	replace risk_Marble2007_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2007
gen risk_Marble2007_outoftown=0 /*if school=="Marblehead Community MS" &year==2007*/
	replace risk_Marble2007_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2007
replace outofarea=. if   school=="Marblehead Community MS" &year==2007
*2008
gen risk_Marble2008_local=0 /*if school=="Marblehead Community MS" &year==2008*/
	replace risk_Marble2008_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2008
gen risk_Marble2008_outoftown=0 /*if school=="Marblehead Community MS" &year==2008*/
	replace risk_Marble2008_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2008
replace outofarea=. if   school=="Marblehead Community MS" &year==2008
*2009
gen risk_Marble2009_local=0 /*if school=="Marblehead Community MS" &year==2009*/
	replace risk_Marble2009_local=1 if outofarea==0 &  school=="Marblehead Community MS" &year==2009
gen risk_Marble2009_outoftown=0 /*if school=="Marblehead Community MS" &year==2009*/
	replace risk_Marble2009_outoftown=1 if outofarea==1 &  school=="Marblehead Community MS" &year==2009
replace outofarea=. if   school=="Marblehead Community MS" &year==2009
*/

*CW 1 27 2012:  CHECK SCHOOL NAMES FOR NEW COHORTS
replace school="APR" if school=="Academy of the Pacific Rim" | school == "ACADEMY OF THE PACIFIC RIM"
replace school="BosCol" if school=="Boston Collegiate" | school=="Boston Collegiate Charter School" | school=="Boston Collegiate MS" | school=="BCCS" | school=="BOSTON COLLEGIATE"
replace school="BGA" if school=="Boston Green Academy" | school=="BOSTON GREEN ACADEMY" | school=="BGA"
replace school="BosPrep" if school=="Boston Prep MS" | school=="Boston Prep" | school=="Boston Preparatory Charter Public School"
replace school="COAH" if school=="City on a Hill" | school == "CITY ON A HILL" | school == "City on a Hill I"
replace school="COAHII" if school=="City on a Hill II" | school=="City on a Hill Charter School Dudley"
replace school="Codman" if school=="Codman Academy" | school=="CODMAN ACADEMY" | school=="Codman Charter School" | school=="Codman School"
replace school="DCA" if school=="Dorchester Collegiate"
replace school="EdBrooke" if school=="Edward Brooke" | school=="EDWARD BROOKE ROSLINDALE CHARTER SCHOOL" | school=="EDWARDS BROOKE ROSLINDALE CHARTER SCHOOL" | school=="Edward Brooke Roslindale Charter School"
replace school="EdBrooke2" if school=="EDWARD BROOKE 2 CHARTER SCHOOL" | school=="Edward Brooke 2 Charter School"
replace school="EdBrooke3" if school=="BROOKE EAST BOSTON" | school == "Edward Brooke III" | school=="Edward Brooke III (East Boston)"
replace school="Excel" if school=="Excel Academy" | school=="EXCEL ACADEMY" | school=="Excel East Boston" | school=="Excel I East Boston" | school=="Excel Academy I (East Boston)"
replace school="Excel3" if school=="Excel III Oriental Heights" | school=="Excel Academy III (Orient Heights)"
replace school="GroveH" if school=="GROVE HALL PREP"
replace school="KIPP_BOS" if school=="KIPP Academy Boston"
replace school="MATCH_HS" if school=="MATCH High School" | school=="MATCH HS" | school=="Match HS" | school=="MATCH HIGH SCHOOL"
replace school="MATCH_MS" if school=="MATCH Middle School" | school=="MATCH MS" | school=="Match MS" | school=="MATCH MIDDLE SCHOOL" | school=="Match Middle School"
replace school="RoxPrep" if school=="Roxbury Preparatory" | school=="Roxbury Prep" | school=="ROXBURY PREP CHARTER SCHOOL"
replace school="UncommonSchools" if school=="Uncommon Schools"
replace school="UPAcademy" if school=="UP Academy" | school=="UP ACADEMY" | school=="UP Academy Boston"

replace school="BridgeB" if school=="Bridge Boston Charter School"
replace school="Conserv" if school=="Conservatory Lab Charter School"
replace school="MATCH_ES" if school=="March Community Day School" | school=="Match Community Day School"
replace school="NHCS" if school=="Neighborhood House Charter School"


replace school="Cape Cod" if school=="Cape Cod Lighthouse"
replace school="FourRiv" if school=="Four Rivers Charter"
replace school="Parker" if school=="Francis Parker Essential School"
replace school="Parker" if school=="Francis Parker"
replace school="Parker" if school=="FRANCIS W PARKER CHARTER ESSENTIAL"
replace school="Innovation" if school=="Innovation Academy"
replace school="Innovation" if school=="INNOVATION ACADEMY"
replace school="KIPP_Lynn" if school=="KIPP Lynn"
replace school="KIPP_Lynn" if school=="KIPP"
replace school="KIPP_Lynn" if school=="KIPP LYNN MIDDLE SCHOOL"
replace school="Marblehead" if school=="Marblehead Community MS"
replace school="PVPA" if school=="Pioneer Valley Performing Arts"
replace school="PVPA" if school=="Pioneer Valley"
replace school="Global" if school=="Global Learning"
replace school="Global" if school=="Global Learning MS"
replace school="FourRiv" if school=="Four Rivers"
replace school="FourRiv" if school=="FOUR RIVERS CHARTER SCHOOL"
replace school="CapeCod" if school=="Cape Cod"
replace school="RisingTide" if school=="Rising Tide"
replace school="SalemAc" if school=="Salem Academy Charter Middle School"
replace school="SalemAc" if school=="SALEM ACADEMY CHARTER"
replace school="Sturgis" if school=="STURGIS CHARTER PUBLIC SCHOOL"

tab school
 
gen byte boston_lottery=0

replace boston_lottery=1 if school=="APR" | school=="BosCol" | school=="BGA" | school=="BosPrep"|school=="COAH" ///
	|school=="COAHII" ///
	|school=="Codman"|school=="DCA"|school=="EdBrooke"|school=="EdBrooke2"|school=="EdBrooke3"|school=="Excel" ///
	|school=="Excel3"|school=="GroveH"|school=="KIPP_BOS"|school=="MATCH_HS"|school=="MATCH_MS"|school=="RoxPrep" ///
	|school=="UncommonSchools"|school=="UPAcademy"|school=="BridgeB"|school=="Conserv"|school=="MATCH_ES"|school=="NHCS"
	
gen byte from_new=1 

gen urban_lottery=0
*add excel 2 here from chelsea SRC
replace urban_lottery=1 if boston_lottery==1|school=="Global"|school=="KIPP_Lynn" |school=="SalemAc"
	
gen notboston_lottery=0
*ChrisP:
gen noturban_lottery=0

replace notboston=1 if boston_lottery==0
*ChrisP:
replace noturban=1 if urban_lottery==0


	// drop if sasid == 1013995811 & grade==5   
	// duplicates drop will probably need to add this in somewhere else

save "$save/new_BU_lottofiles_parent3_mnameTEST_V2.dta", replace

}

if "${bostondata}"=="1"{
*read in the old master file
use  "$charter/Full Charter Lottery Master File June 2010_withcorrections.dta", replace
*get rid of APR and rox prep that are above
drop if school=="Academy of Pacific Rim"
drop if school=="Roxbury Prep" &applicationyear>=2006
rename applicationyear year
rename gradeapplying grade
destring grade, replace force
destring sasid, replace
keep sasid streetaddress townofresidence parentslastname parentsfirstname birthdate outofboston ndlotto  lotterynumber  waitlistnumber   basedonschool cantmatch offeredadmission siblingstatus school year  grade offeredadmissioninitianlly 
	// no zipcode available
gen boston_lottery=1
gen lateapplicant=0
replace lateapplicant=1 if  ndlotto=="x" 
gen outofarea=0
replace outofarea=1 if outofboston~="" 
drop outofboston ndlotto
replace basedon="1" if basedon=="YES"|basedon=="Yes"
destring basedon, replace force

*sibling stuff;
#delimit;
replace siblingstatus="Yes" if lotterynumber=="259 (sibling)";
gen sibling=0;
replace sibling=1 if siblingstatus=="Apllied/Enrolled" | siblingstatus=="Applied/Enrolled" | siblingstatus=="Enrolled" |
	 siblingstatus=="Enrolled in 6th" | siblingstatus=="Enrolled in 7th grade" | siblingstatus=="Yes" |
	  siblingstatus=="applied/enrolled" | siblingstatus=="applying/Enrolled" | siblingstatus=="enrolled" | 
	  siblingstatus=="sibling enrolled" | siblingstatus=="x" | siblingstatus=="SIBLING"|siblingstatus=="Enrolled"|
	  siblingstatus=="y"|siblingstatus=="Yes"|siblingstatus=="YES";
	  
gen siblingapplying=0;
replace siblingapplying=1 if siblingstatus=="7th grade applicant" | siblingstatus=="Applying" | siblingstatus=="Both labelled 153 " |
	 siblingstatus=="applying" | siblingstatus=="applying for grade 5" | siblingstatus=="applying for grade 6 " | siblingstatus=="applying for grade 6" |
	  siblingstatus=="applying for grade 6 and grade 7" | siblingstatus=="applying for grade 7" | 
	  siblingstatus=="applying for grade 8" | siblingstatus=="applyinh" | siblingstatus=="sibling applying" | siblingstatus=="Applying" | siblingstatus=="applying";  
#delimit cr

drop siblingstatus
**********************
*OFFER VARIABLES
*************************
*Clean up lottery number and waitlist number

*Some specific cases were messed up
replace lotterynumber="400" if lotterynumber=="383/400 double entry"
replace lotterynumber="259" if lotterynumber=="259 (sibling)"
replace lotterynumber=" " if lotterynumber=="late application"

destring waitlistnumber, gen(waitnum) force
destring lotterynumber, generate(lotnum) force

label variable lotnum "Numeric Lottery Number" 
label variable waitnum "Numeric Waitlist Number"	
replace  offeredadmission="No" if offeredadmission==""
replace offeredadmission="No" if offeredadmission=="REALLY IN 6TH"

*EVER OFFER
tab offeredadmission
gen offer=0
replace offer=1 if offeredadmission=="Yes"|offeredadmission=="yes"|offeredadmission=="1"
*Note:  Brooke messed up. All should be offered according to lottery log
replace offer=1 if school=="Edward Brooke" & year==2006

gen initial_offer=.

*boston collegiate 2002
replace initial_offer=1 if school=="Boston Collegiate"&year==2002&lotnum>=1&lotnum<=40&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2002
*boston collegiate 2003
replace initial_offer=1 if school=="Boston Collegiate"&year==2003&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2003
*boston collegiate 2004
replace initial_offer=1 if school=="Boston Collegiate"&year==2004&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2004
*boston collegiate 2005 
replace initial_offer=1 if school=="Boston Collegiate"&year==2005&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2005
*boston collegeiate 2006
replace initial_offer=1 if school=="Boston Collegiate"&year==2006&lotnum>=1&lotnum<=66&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Collegiate"&year==2006
*boston prep 2005 
replace initial_offer=1 if school=="Boston Prep"&year==2005&lotnum>=1&lotnum<=98&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep"&year==2005
*boston prep 2006
replace initial_offer=1 if school=="Boston Prep"&year==2006&lotnum>=1&lotnum<=80&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep"&year==2006
*boston prep 2007
replace initial_offer=1 if school=="Boston Prep" &year==2007 & lotnum>=1 & lotnum<=100 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Boston Prep" &year==2007
*Edward Brooke 2006
replace initial_offer=1 if school=="Edward Brooke" & year==2006 & lotnum<=39 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Edward Brooke" & year==2006
*Edit by Chris 5/24/10:  Add additional MATCH as per e-mail from Julia Manoli April 14 2010 (2005 and 2006)
*match 2002
replace initial_offer=1 if school=="MATCH"&year==2002&lotnum>=1&lotnum<=61&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH" & year==2002
*match 2003
replace initial_offer=1 if school=="MATCH"&year==2003&lotnum>=1&lotnum<=72&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2003
*match 2004
replace initial_offer=1 if school=="MATCH"&year==2004&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2004
*match 2005
replace initial_offer=1 if school=="MATCH"&year==2005&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2005
*match 2006
replace initial_offer=1 if school=="MATCH"&year==2006&lotnum>=1&lotnum<=65&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="MATCH"&year==2006
*hca 2003
replace initial_offer=1 if school=="Health Careers"&year==2003&lotnum>=1&lotnum<=48&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2003
*hca 2004
replace initial_offer=1 if school=="Health Careers"&year==2004&lotnum>=1&lotnum<=88&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2004
*hca 2005
replace initial_offer=1 if school=="Health Careers"&year==2005&lotnum>=2&lotnum<=55&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers"&year==2005
*hca 2006
replace initial_offer=1 if school=="Health Careers" &year==2006 & lotnum>=1 & lotnum<=70 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="Health Careers" & year==2006
*coah 2002
replace initial_offer=1 if school=="City on a Hill"&year==2002&lotnum>=1&lotnum<=100&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2002
*coah 2004
replace initial_offer=1 if school=="City on a Hill"&year==2004&lotnum>=1&lotnum<=70&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2004
*coah 2005
replace initial_offer=1 if school=="City on a Hill"&year==2005&lotnum>=1&lotnum<=200&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill"&year==2005
*coah 2006
replace initial_offer=1 if school=="City on a Hill" &year==2006 & lotnum>=1 & lotnum<=225 & lotnum!=.
replace initial_offer=0 if initial_offer!=1 & school=="City on a Hill" & year==2006
*codman 2004
replace initial_offer=1 if school=="Codman"&year==2004&lotnum>=1&lotnum<=41&lotnum~=.
replace initial_offer=0 if initial_offer!=1 & school=="Codman"&year==2004

drop offered*
drop lot* wait*

replace school="BosCol" if school=="Boston Collegiate"
replace school="BosPrep" if school=="Boston Prep"
replace school="COAH" if school=="City on a Hill"
replace school="MATCH_HS" if school=="MATCH"
replace school="RoxPrep" if school=="Roxbury Prep"
replace school="EdBrooke" if school=="Edward Brooke"


*As of July 18 as per Josh, drop HCA
drop if school=="HCA"
drop if school=="Health Careers" /* added by EMS 8-19-2013 */

format sasid %12.0f
gen from_old=1

*KP addeed 7/14/20
rename parentslastname parent1lastname
rename parentsfirstname parent1firstname

save old_lottofiles_parent3.dta, replace


}

*** combine files, last chance at matching

use "new_BU_lottofiles_parent3_mnameTEST_V2.dta", clear
append using "$save\old_lottofiles_parent3.dta", force
qui compress

g obs = _n 
save "appended_parent3_mnameTEST_V2.dta", replace 


*Final attempt to match any remaining unmatched
if "$match"=="1"{
use "$save/appended_parent3_mnameTEST.dta"
keep obs sasid  caplast capfirst year grade mname  
keep if sasid==.
replace caplast=trim(upper(caplast))
replace capfirst=trim(upper(capfirst))
tostring mname, replace
replace mname=trim(upper(mname))
ren caplast lastname
ren capfirst firstname
replace lastname=subinstr(lastname,", JR.","",.)
replace lastname=subinstr(lastname,"'","",.)
replace lastname=subinstr(lastname," ","",.)
foreach vars in firstname mname lastname {
	replace `vars' = subinstr(`vars',"`","",.)
	replace `vars' = subinstr(`vars',`"""',"",.)
}
replace year = year+1  if grade<=8
replace year = year+2 if grade==9 //testyear
g schoolyr=year

sort obs

gen entrygrade = grade 

save "$save/nosasid_parent3.dta", replace

*Change by CT 5/18/10 - Must be after the initial save so that "$save/nosasid.dta" preserves all records
duplicates tag lastname firstname year grade, gen(dup)
drop if dup>=1
drop dup
sort year grade lastname firstname 

merge 1:1 year grade lastname firstname using "${dir}/uniquenames.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile thisyear
save "`thisyear'"

use "$save/nosasid_parent3.dta", clear
if grade==9{
	replace year=year-2
}
if grade<=8{
	replace year=year-1
	}
replace grade=grade-1

*Change by CT 5/18/10 - Must be repeated to eliminate duplicates
duplicates tag year grade lastname firstname, gen(dup)
drop if dup>=1
drop dup
sort year grade lastname firstname 

merge 1:1 year grade lastname firstname using "${dir}/uniquenames.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile lastyear
save "`lastyear'"

*now with middle name 
use "$save/nosasid_parent3.dta", clear
duplicates tag lastname firstname mname , gen(dup)
drop if dup>=1
drop dup
merge 1:1 lastname firstname mname  using "${dir}/uniquenames_middle.dta" , keep(1 3)
drop _merge
drop year grade
sort obs
tempfile mname
save "`mname'"

use "$save/nosasid_parent3.dta", clear
drop if obs==.
merge 1:1 obs using "`lastyear'"
drop _merge
drop if obs==.
merge 1:1 obs using "`thisyear'", update replace
drop _merge
drop if obs==.
merge 1:1 obs using "`mname'", update replace
drop _merge
drop if obs==.
keep  obs lastname firstname mname  entrygrade sasid schoolyr
ren sasid statasasid
sort obs
** Save the unique matches file
save "$save\allremaining_parent3", replace


/* ***************************************************************************************************************************
	Fuzzy Match*/

*************************************************************************************************************************** 
* Only keep obs w/o matched sasids
drop if statasasid != . 


******************************************************************************************************************
*********** First remove - and ' and spaces from names and if there is a perfect match, then consider it a match. 
******************************************************************************************************************
local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_hyphen = regexm(`var1',"-")
	gen `var1'_space = regexm(`var1'," ")
	gen `var1'_apost = regexm(`var1',"'")
}
gen trimmed_formatted = 1 if firstname_hyphen == 1 | firstname_space == 1 | firstname_apost == 1 | lastname_hyphen == 1 | lastname_space == 1 | lastname_apost == 1
drop firstname_hyphen firstname_space firstname_apost lastname_hyphen lastname_space lastname_apost

local vars firstname lastname
foreach var1 in `vars' {
	replace `var1' =subinstr(`var1', "-", "",.)
	replace `var1' =subinstr(`var1', " ", "",.)
	replace `var1' =subinstr(`var1', "'", "",.)
}
sort obs
tempfile temp_formatted
save `temp_formatted'


use "${dir}\fuzzymatchnames.dta", clear

local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_hyphen = regexm(`var1',"-")
	gen `var1'_space = regexm(`var1'," ")
	gen `var1'_apost = regexm(`var1',"'")
}
gen trimmed_SIMS = 1 if firstname_hyphen == 1 | firstname_space == 1 | firstname_apost == 1 | lastname_hyphen == 1 | lastname_space == 1 | lastname_apost == 1
drop firstname_hyphen firstname_space firstname_apost lastname_hyphen lastname_space lastname_apost

	local vars firstname lastname
	foreach var1 in `vars' {
		replace `var1' =subinstr(`var1', "-", "",.)
		replace `var1' =subinstr(`var1', " ", "",.)
		replace `var1' =subinstr(`var1', "'", "",.)
	}
sort ma_obs
tempfile temp_SIMS
save `temp_SIMS'

use `temp_formatted'

reclink lastname firstname using `temp_SIMS', idmaster(obs) idusing(ma_obs) gen(matchqual) required(lastname firstname) orblock(lastname firstname)

keep if trimmed_formatted == 1 | trimmed_SIMS == 1

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}

// Consider it a match if they are the right age and had an exact match on first and last name without hyphens, spaces, and apostrophes
keep if exactyr_match == 1 & matchqual == 1
capture duplicates drop obs sasid, force // You will be able to catch if it's not unique by obs and sasid later

* we'll also drop the observations that match with multiple sasids, leaving them to handmatching/fuzzy matching
bys obs: gen obs_ct = _N
capture drop if obs_ct > 1
capture drop obs_ct 

sort obs
keep obs lastname firstname mname dob sasid
save "${save}\allremaining_fuzzy_compact_a_parent3", replace


******************************************************************************************************************
*********** Remove JR and JR. and Determine a Match if first and last are then identical and year is correct
* Note that we only want to keep those who have JR or JR. at the end of their first or last name. Since JRs happen 
*    both datasets, we need to just tag which we have removed JRs from and only keep the match if there was a JR in
* 	 at least one of the datasets
******************************************************************************************************************
use "${save}\allremaining_parent3", clear
sort obs
* Only keep obs w/o matched sasids
drop if statasasid != . 


* Keep only those that have JR and JR. at the end of first or last name
local vars firstname lastname
foreach var1 in `vars' {
	gen `var1'_JR2 = substr(`var1',-2,2) 
	gen `var1'_JR3 = substr(`var1',-3,3) 
}

gen JR_formatted = 1 if firstname_JR2 == "JR" | firstname_JR3 == "JR." | lastname_JR2 == "JR" | lastname_JR3 == "JR."
drop firstname_JR2 firstname_JR3 lastname_JR2 lastname_JR3

** JR and JR. in first and last name
local vars firstname lastname
foreach var1 in `vars' {
	replace `var1' = subinstr(`var1',"JR","",length(lastname)-2)
	replace `var1' = subinstr(`var1',".","",length(lastname)-1)
}
sort obs
tempfile tempJR_formatted
save `tempJR_formatted'


*** NOW REPEAT FOR THE SIMS DATA
	use "${dir}\fuzzymatchnames.dta", clear

	* Keep only those that have JR and JR. at the end of first or last name
	local vars firstname lastname
	foreach var1 in `vars' {
		gen `var1'_JR2 = substr(`var1',-2,2) 
		gen `var1'_JR3 = substr(`var1',-3,3) 
	}

	** JR and JR. in first and last name
	local vars firstname lastname
	foreach var1 in `vars' {
		replace `var1' = subinstr(`var1',"JR","",length(lastname)-2)
		replace `var1' = subinstr(`var1',".","",length(lastname)-1)
	}
	gen JR_SIMS = 1 if firstname_JR2 == "JR" | firstname_JR3 == "JR." | lastname_JR2 == "JR" | lastname_JR3 == "JR."
	drop firstname_JR2 firstname_JR3 lastname_JR2 lastname_JR3
		sort ma_obs
	tempfile tempJR_SIMS
	save `tempJR_SIMS'

   use `tempJR_formatted'

reclink lastname firstname using `tempJR_SIMS', idmaster(obs) idusing(ma_obs) gen(matchqual) required(lastname firstname) orblock(lastname firstname)

keep if JR_formatted == 1 | JR_SIMS == 1

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}

// Consider it a match if they are the right age and had an exact match on first and last name without hyphens, spaces, and apostrophes
keep if exactyr_match == 1 & matchqual == 1
capture duplicates drop obs sasid, force // You will be able to catch if it's not unique by obs and sasid later 

* we'll also drop the observations that match with multiple sasids, leaving them to handmatching/fuzzy matching
bys obs: gen obs_ct = _N
capture drop if obs_ct > 1
capture drop obs_ct 
sort obs
keep obs lastname firstname mname dob sasid
save "${save}\allremaining_fuzzy_compact_b_parent3", replace

******************************************************************************************************************
*********** Continue with Fuzzy Matching *************************************************************************
******************************************************************************************************************

use "${save}\allremaining_parent3", clear
sort obs
* Only keep obs w/o matched sasids
drop if statasasid != . 

* Remove the names you just matched by removing hyphens
	merge 1:1 obs using "${save}\allremaining_fuzzy_compact_a_parent3"
	drop if _merge == 3
	drop _merge
	
* Remove the names you just matched by removing hyphens
	merge 1:1 obs using "${save}\allremaining_fuzzy_compact_b_parent3"
	drop if _merge == 3
	drop _merge
	
drop sasid statasasid
	
foreach vars in lastname firstname mname {
	replace `vars' = subinstr(`vars',"`","",.)
}
sort obs
* Unique observation counter in fuzzymatchnames.dta is called ma_obs
*reclink lastname firstname mname dob using "${dir}\fuzzymatchnames.dta", idmaster(obs) idusing(ma_obs) gen(matchqual) wmatch(10 10 2 2)
*not using DOB or mname here
reclink lastname firstname using "${dir}\fuzzymatchnames.dta", idmaster(obs) idusing(ma_obs) gen(matchqual) wmatch(10 10 )

* Only keep matches with reasonable year ranges. For middle school - want to be within 1 year of the correct entry. 
	* For high school - want to be within 2 years of the correct entry year.

	gen exactyr_match = .
	replace exactyr_match = 1 if year == schoolyr & grade == entrygrade
	forvalues i = 1(1)6 {
		replace exactyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i'
	}
	forvalues i = 1(1)7 {
		replace exactyr_match = 1 if year == schoolyr+`i' & grade == entrygrade + `i'
	}
	
	gen reasonableyr_match = . // If off by one year
		forvalues i = 0(1)6 {
			// Grade says entering one year earlier than should
			replace reasonableyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' - 1
			
			// Grade says entering one year later than should
			replace reasonableyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' + 1	
			
		}
		
		forvalues i = 1(1)7 {
			// Grade says entering one year earlier than should
			replace reasonableyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' - 1
			
			// Grade says entering one year later than should
			replace reasonableyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' + 1			
		}
		
	gen twoyr_match = .
	if entrygrade>=8 { // If off by two years - only turns on for high school
			forvalues i = 0(1)6 {
			// Grade says entering one year earlier than should
			replace twoyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' - 2
			
			// Grade says entering one year later than should
			replace twoyr_match = 1 if year == schoolyr-`i' & grade == entrygrade - `i' + 2			
		}
		
		forvalues i = 1(1)7 {
			// Grade says entering one year earlier than should
			replace twoyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' - 2
			
			// Grade says entering one year later than should
			replace twoyr_match = 1 if year == schoolyr + `i' & grade == entrygrade + `i' + 2			
		}
			
	}
	
	* drop if there is no reasonable match of years
	drop if exactyr_match == . & reasonableyr_match == . & twoyr_match == .
	

save "${save}\allremaining_fuzzy_full_parent3", replace 
use "${save}\allremaining_fuzzy_full_parent3", clear 

* Only keep the non duplicate observations to investigate:

* duplicates drop obs sasid, force - I removed this because we want to see if the same kid has the same sasid for many years and then
* one year where he changes sasids
* The point of this command was originally:
	* To reduce the number of observations for each person. This will give us just one observation per obs and sasid so if it is assigning the same 
	* sasid to all of the observations, STATA will arbitrarily just show one of them. 
gen keepsasid = .
gsort -matchqual exactyr_match reasonableyr_match

*unique obs sasid pairs
bys obs sasid: g first = _n==1
keep if first==1
gsort obs sasid

*SRC investigated 9/5/19 these are mostly good but a few ties to break
	duplicates tag obs  , gen(dup)
replace keepsasid=1 if dup==0
replace keepsasid=1 if exact==1&keepsasid==.
keep if keepsasid==1
drop dup
duplicates tag obs, gen(dup)
keep if dup==0
*another check
replace keepsasid=.
replace keepsasid=1 if exact==1&keepsasid==.
replace keepsasid=1 if reasonableyr_match==1&keepsasid==.&matchqual==1
gsort -matchqual exactyr_match reasonableyr_match

*hand match here
order keepsasid lastname Ulastname firstname Ufirstname matchqual town_res exactyr_match reasonableyr_match twoyr_match 
*br if keepsasid!=1
replace keepsasid = 1 if keepsasid==.& matchqual>=0.9691 //SRC review 1/25/20 -- these look good
*hand keep the rest that look like good matches
egen handmatch=anymatch(obs), values(7438 5801 2247 51095 48556 7142 46921 ///
14888 15799 57892 54148 47851 45492 40025 39257 51252 14969 13371 35941 20225 13982 ///
20430 19518 7324 47242 19584 19663 7071 8095 49803 21945 5415 )
replace keepsasid=1 if handmatch==1
drop handmatch

save "${save}\allremaining_fuzzy_compact_v2_c_parent3", replace


 use "${save}\allremaining_fuzzy_compact_a_parent3", clear
 merge 1:1 obs using "${save}\allremaining_parent3"
 rename _merge hyphensmatch
 save "${save}\allremaining_Matched_parent3", replace
 
 use "${save}\allremaining_fuzzy_compact_b_parent3", clear
 merge 1:1 obs using "${save}\allremaining_Matched_parent3"
 rename _merge JRmatch
 save "${save}\allremaining_Matched_parent3", replace
 
 use "${save}\allremaining_fuzzy_compact_v2_c_parent3", clear
 drop _merge
 
 merge 1:1 obs using "${save}\allremaining_Matched_parent3"
 rename _merge fuzzymatch
 save "${save}\allremaining_Matched_parent3", replace
 keep if  keepsasid == 1
 sort obs
 
 * Now statasasid has all of the initial matches and sasid has the fuzzy matches
rename sasid fuzzysasid 
 g double combo_sasid=statasasid //MUST FORMAT AS DOUBLE TO KEEP ALL INFORMATION!!!! 
  format combo %12.0f
replace combo=fuzzysasid if combo==.
keep if combo!=.
 save "${save}\allremaining_Matched_parent3_mnameTEST", replace
 
}	



if "$cleanup"=="1"{
use "$save/appended_parent3_mnameTEST_V2.dta", clear
merge 1:1 obs using "${save}\allremaining_Matched_parent3_mnameTEST", keepusing(obs combo_sasid) nogen
replace sasid = combo_sasid if sasid==.
drop combo_sasid


*only complete dups are due to missing sasids, some dup sasids b/c of multiple applications

drop from*

replace notbos=0 if notbos==.
replace noturb=0 if noturb== .		
replace urban_lottery=1 if boston_lottery==1
	*not urban are: Sturgis, FourRiv, Parker, Marble, Innov, PVPA, CapeCod


***a couple of kids apply for the wrong grade level (both HS and MS)
replace disqualified=1 if sasid==1016636017&school=="MATCH_HS"&year==2005&grade==9
replace disqualified=1 if sasid==1004943511&year==2011&grade==9
replace disqualified=1 if sasid==1051329713 & grade==6
replace disqualified=1 if sasid==1051944514 & grade==6 
replace disqualified=1 if sasid==1076561606 & grade==6
replace disqualified=1 if sasid==1086720716 & grade==6
replace disqualified=1 if sasid==1004671608 & grade==6

	***a couple of kids apply for the wrong grade level (both HS and MS/ES)
	gen hs = (school=="MATCH_HS"|school=="COAH"|school=="COAHII"|school=="BGA"|(school=="Codman"&grade==9)|(school=="BosPrep"&grade==9))  /* need to look into 1001256211*/
	bys sasid year: egen x = mean(hs)
	// Browse if sasid is not alway middle school and not always hs 
	sort sasid
	*br if x ~= 0 & x ~= 1 & sasid ~=.
	drop x  
	
	* do the same for ES and MS
	gen es = (grade>=-1 & grade<=4)
		bys sasid year: egen x = mean(es)
		sort sasid
		drop x
		
*D.SUN 04-01-2013: 
*the judgment call here on which to be dropped is based on year-of-birth from SIMS
*check "LTO lotto app year duplicates correction" excel for details
replace disqualified=1 if sasid==1004671608&school=="BosPrep"&year==2007&grade==6
replace disqualified=1 if sasid==1016612314&school=="BosPrep"&year==2009&grade==6
replace disqualified=1 if sasid==1016636017&school=="MATCH_HS"&year==2005&grade==9
*the following line should be boscol =1 in the end
replace disqualified=1 if sasid==1035417419&school=="BosPrep"&year==2007&grade==6  
replace disqualified=1 if sasid==1055848512&school=="BosPrep"&year==2005&grade==6
replace disqualified=1 if sasid==1056919512&school=="BosPrep"&year==2008&grade==6
replace disqualified=1 if sasid==1067007511&school=="BosPrep"&year==2006&grade==6
replace disqualified=1 if sasid==1076561606&school=="BosPrep"&year==2007&grade==6
replace disqualified=1 if sasid==1091949514&school=="APR"&year==2008&grade==5
replace disqualified=1 if sasid==1091949514&school=="COAH"&year==2008&grade==9
*the following line should be boscol =1 in the end
replace disqualified=1 if sasid==1093516515&school=="BosPrep"&year==2007&grade==6 
*replace disqualified=1 if sasid==1090912207&school=="BosPrep"&year==2006&grade==6
replace disqualified=1 if sasid==1051615711 & year==2007 & school=="BosPrep" & grade==6
replace disqualified=1 if sasid==1059519511 & year==2011 & grade==6 & school=="UPAcademy"
replace disqualified=1 if sasid==1018536322 & year == 2013 & grade==9
replace disqualified=1 if sasid==1048628722 & year==2013 & grade==9

replace disqualified=1 if sasid==1010215528 & year==2012 & grade==0
replace disqualified=1 if sasid==1037722613 & year==2007 & grade==0
replace disqualified=1 if sasid==1058098410 & year==2013 & grade==0
replace disqualified=1 if sasid==1083591011 & year==2013 & grade==0

replace disqualified=1 if sasid==1025329228 & year==2011 & grade==1
replace disqualified=1 if sasid==1016907625 & year==2012 & grade==0
replace disqualified=1 if sasid==1018639620 & year==2013 & grade==0
replace disqualified=1 if sasid==1035994217 & year==2012 & grade==0
replace disqualified=1 if sasid==1052290618 & year==2012 & grade==0

replace disqualified=1 if sasid==1065011624 & year==2011 & grade==2
replace disqualified=1 if sasid==1025813024 & year==2012 & grade==1

* One kid applied to different schools/grades in the same year - disqualify from the one that is the wrong grade * new as of 3/22/2015
replace disqualified=1 if sasid==1013995811 & year==2012 & grade==4

*DS 8/16/2013: detect late-applicants/siblings and flag them out -- we do not want them to be in the applicant pool !
replace sasid=. if basedon==1 /* basedon==1 when decision to code sasid was based on town */
gen dup=1 if cantmatch=="dup" | duplicate==1 //| duplicates==1
cap drop cantmatch
gen unmatched = (sasid==.)
gen y = (disqualified==1|lateapplicant==1|outofarea==1|sibling==1|unmatched==1|dup==1)
save "$data_setup/for_demand_analysis_parent3.dta", replace


*Generate application variabless
* y is the all of the applicants we drop (sib, late, disq, outofarea). We don't give them an apply coding.
*CW 1 27 2012:  ADD APPLY SALEM ACADEMY
gen applyAPR=1 if school=="APR" & y!=1
gen applyBosCol=1 if school=="BosCol" & y!=1
gen applyBGA=1 if school=="BGA" & y!=1 
gen applyBosPrep=1 if school=="BosPrep" & y!=1
gen applyCoaH=1 if school=="COAH" & y!=1 
gen applyCoaHII=1 if school=="COAHII" & y!=1
gen applyCodman=1 if school=="Codman" & y!=1 
gen applyDCA=1 if school=="DCA" & y!=1
gen applyEdBrooke=1 if school=="EdBrooke" & y!=1 
gen applyEdBrooke2=1 if school=="EdBrooke2" & y!=1
gen applyEdBrooke3=1 if school=="EdBrooke3" & y!=1
gen applyExcel=1 if school=="Excel" & y!=1
gen applyExcel3=1 if school=="Excel3" & y!=1
gen applyGroveH=1 if school == "GroveH" & y!=1
gen applyKippBos=1 if school=="KIPP_BOS" & y!=1
gen applyMATCH_HS=1 if school=="MATCH_HS" & y!=1
gen applyMATCH_MS=1 if school=="MATCH_MS" & y!=1
gen applyRoxPrep=1 if school=="RoxPrep" & y!=1
gen applyUncommon=1 if school=="UncommonSchools" & y!=1
gen applyUP=1 if school=="UPAcademy" & y!=1 

gen applyDP=1 if school=="UncommonSchools" & y!=1
replace applyRoxPrep=1 if school=="UncommonSchools" & y!=1
replace applyGroveH=1 if school=="UncommonSchools" & y!=1

gen applyMATCH_ES=1 if school=="MATCH_ES" & y!=1
gen applyBridgeB=1 if school=="BridgeB" & y!=1
gen applyConserv=1 if school=="Conserv" & y!=1
gen applyNHCS=1 if school=="NHCS" & y!=1

gen applyCapeCod=1 if school=="CapeCod" & y!=1
gen applyFourRiv=1 if school=="FourRiv" & y!=1
gen applyGlobal=1 if school=="Global" & y!=1
gen applyInnov=1 if school=="Innovation" & y!=1
gen applyMarble=1 if school=="Marblehead" & y!=1
gen applyPVPA=1 if school=="PVPA" & y!=1
gen applyParker=1 if school=="Parker" & y!=1
gen applySalemAc=1 if school=="SalemAc" & y!=1
gen applySturgis=1 if school=="Sturgis" & y!=1
gen applyKIPPLynn=1 if school=="KIPP_Lynn" & y!=1
gen applyRisingTide=1 if school=="RisingTide" & y!=1


*CW 1 27 2012:  UPDATE SCHOOL NAMES, INCL SALEM ACADEMY
foreach l in APR BosCol BGA BosPrep CoaH CoaHII Codman DCA /* DP never had it's own lottery*/ ///
EdBrooke EdBrooke2 EdBrooke3 Excel Excel3 GroveH KippBos MATCH_HS ///
	 MATCH_MS RoxPrep /*Uncommon*/ UP MATCH_ES BridgeB Conserv NHCS /*prioritygroup*/ ///
	 CapeCod FourRiv Global Innov Marble PVPA Parker SalemAc Sturgis KIPPLynn RisingTide {
	*individual offer variables
	gen offer`l'=offer if apply`l'==1 /* This excludes people who are siblings or disqualified from having offer_school */
		replace offer`l'=0 if offer`l'==.
	gen initial_offer`l'=initial_offer if apply`l'==1
		 replace initial_offer`l'=0 if initial_offer`l'==.
}

* KP 7/14/20 - omitted
/* substitute the better data for UCS
foreach l in RoxPrep GroveH {
	foreach offtype in offer initial_offer {
		replace `offtype'`l' = `offtype'`l'_corr if `offtype'`l'_corr!=.
		replace `offtype'`l' = 0 if `offtype'`l'==.
	}
}	
	drop *_corr*/
	
/* Clean the first, second, and third choice schools of UCS
ren stschool ucs_firstchoice
ren ndschool ucs_secondchoice
ren rdschool ucs_thirdchoice
	gen ucs_first_DP = .	
	gen ucs_first_GH = .
	gen ucs_first_RP = .
		replace ucs_first_DP = 1 if inlist(ucs_firstchoice, "DORCHEST", "DORCHESTE","DORCHESTER","DORCHESTER PREP","DORCHESTERT PREP","DPCHS","Dorchester","Dorchester ")
		replace ucs_first_GH = 1 if inlist(ucs_firstchoice,"GORVE HALL","GROVE  HALL PREP","GROVE HALL","GROVE HALL PREP","Lucy Stone")
		replace ucs_first_RP = 1 if inlist(ucs_firstchoice,"Mission Hill","ROSBURY","ROSBURY PREP","ROXBUERY","ROXBURG","ROXBURY")
			replace ucs_first_RP = 1 if inlist(ucs_firstchoice,"ROXBURY CHARTER","ROXBURY PERP","ROXBURY PRE","ROXBURY PREP","ROXBURY PREPATORY") */
			
	//drop ucs_firstchoice ucs_secondchoice ucs_thirdchoice		 
			
/* Want to drop from dataset 
	- people who just applied to lotteries that were not oversubscribed
	- people who applied to not oversubscribed lotteries and only got an offer at the not oversubscribed
		lottery should have their "offer" and "initial offer" turned to 0
		
		NEED TO FINISH THIS SO THAT offer and initial_offer (the general variables, not school specific) are correct
	EMS 8-30-2013 
	
	*** 3/20/2015: EMS- Since now I am using initial and ever offers as separate instruments, I can keep the non-oversub-
	scribed lotteries' initial offers
	*/
// Not oversubscribed: EdBrookeI 2006; COAH 2002; COAH 2004; Codman 2004; Bos Prep 2005; COAH 2008; BGA 2012; BGA 2014; COAH2013; UP 2014
// No ever offer:
// No initial offer: EdBrookeI 2012, Conservatory 2010
	
	replace offerEdBrooke=0 if year==2006
	*replace initial_offerEdBrooke=0 if year==2006 // older code used to have this turned on, changed 3/20/2015
	
	replace offerCoaH=0 if year==2002
	*replace initial_offerCoaH=0 if year==2002

	replace offerCoaH=0 if year==2004
	*replace initial_offerCoaH=0 if year==2004

	replace offerCoaH=0 if year==2008
	*replace initial_offerCoaH=0 if year==2008

	replace offerCodman=0 if year==2004
	*replace initial_offerCodman=0 if year==2004

	replace offerBosPrep=0 if year==2005
	*replace initial_offerBosPrep=0 if year==2005

	replace offerBGA=0 if year==2012
	*replace initial_offerBGA=0 if year==2014

	replace offerBGA=0 if year==2012
	
	replace offerCoaH=0 if year==2013
	
	replace offerUP=0 if year==2014
	
	replace initial_offerEdBrooke=0 if year==2012
	
	replace initial_offerConserv=0 if year==2012
	
/*
foreach v of varlist risk_*{
	bys sasid year: egen max`v'=max(`v') 
	replace max`v'=. if sasid==.
	replace `v'=1 if max`v'!=. & max`v'!=0
	drop max`v'
	replace `v'=0 if `v'
} */
foreach l in APR BosCol  BosPrep CoaH  Codman MATCH_MS RoxPrep MATCH_ES ///
	 {
	
	 *across observations 
	bys sasid year: egen max`l'=max(apply`l') 
	replace max`l'=. if sasid==.
	replace apply`l'=1 if max`l'!=. & max`l'!=0
	drop max`l'
	replace apply`l'=0 if apply`l'==.
	
	* Making individual school offer variables same across observations for each sasid EMS
	bys sasid year: egen max_offer`l'=max(offer`l')
		replace max_offer`l'=. if sasid==.
		replace offer`l'=1 if max_offer`l'!=.
		replace offer`l'=. if max_offer`l'==0
		drop max_offer`l'
	bys sasid year: egen max_initial_offer`l'=max(initial_offer`l')
		replace max_initial_offer`l'=. if sasid==.
		replace initial_offer`l'=1 if max_initial_offer`l'==1
		replace initial_offer`l'=. if max_initial_offer`l'==0
		drop max_initial_offer`l'
		
}

/* KP 7/14/20 omitted
* make UCS first choice data consistent across sasid
	foreach sch in RP DP GH {
		bys sasid year: egen max_ucs_first_`sch'=max(ucs_first_`sch')
		replace ucs_first_`sch' = max_ucs_first_`sch'
		drop max_ucs_first_`sch'
	}	
*/

* make consistent across observations:
foreach l in `shortnames' {
	bys sasid year: egen max`l' = max(`l')
		replace max`l' = . if sasid==.
		replace `l'=1 if max`l'!=. & max`l'!=0
		drop max`l'
		replace `l'=0 if `l'==.
}		

* To check that students haven't applied to different grades that don't make sense: drop if y==1 here and then
* check the tab mingrmaxgr below

bys sasid year: egen mingrade = min(grade) if y!=1 // the if y!=1 added 3/20/2015
by sasid year: egen maxgrade = max(grade) if y!=1
tab mingr maxgr
replace grade=56 if mingrade==5&maxgrade==6

* Identify students who apply for multiple grades for the young grades where age is more flexible
* These students should be in the same risk set
* This is the same as how you group students together who apply for both 5th and 6th grade
replace grade=-0.5 if mingrade==-1 & maxgrade==0
replace grade=0.5 if mingrade==0 & maxgrade==1

*one year of sturgis has grade at time of app, not grade applying for
replace grade=9 if grade==8

drop mingr maxgr

*do 9th and other gradesseparately
/*Will now save files that will later be used in the lottery audit file to create the Sample Restrictions, SIMS Match and Outcome Samples tables*/
	
	* figure out which students applied to elem lotteries, then MS. Just flag them for now
	bys sasid : egen minyear=min(year)
	tab year minyear
	gen mult_lottoflag = 1 if year!=minyear
		bys sasid: egen mult_lottoflag2 = max(mult_lottoflag)
		*investigate the grades
		gen mult_lottoflag_ES = 1 if mult_lottoflag2==1 & grade<=4
			bys sasid: egen mult_lottoflag_ES2 = max(mult_lottoflag_ES)		
			
		bys sasid: egen mingrade = min(grade)
		bys sasid: egen maxgrade = max(grade)
		tab mingr maxgr
	
	gen firstapp=(year==minyear)
	drop minyear maxgr
	
	preserve
	keep if grade==5 | grade==6 | grade==56 | grade==7
	* before DCA was counted as middle school, now count it as elementary school for serving those younger than 5th grade 3/22/2015
g middle =1
g elem = 0 
g highplus= 0
save "middle_audit_parent3", replace

restore

preserve
keep if grade<=4
* keep if firstapp==1 - need to deal with this later
g elem=1
g middle=0 
g highplus=0
save "elem_audit_parent3", replace
restore

keep if grade>=5
*10.26.2010 SRC add middle grades to HS where appropriate
gen flag=0 // Flag is for the highplus schools - that are middle and high school and the cohorts age to be in high school 
// as of HS class of 2016
	* Now APR is 6th grade entry
		replace flag=1 if school=="APR" & (year<=2010)
	* Bos Col is 5th grade entry
	replace flag=1 if school=="BosCol" & (year<=2009)
	* BosPrep is 6th grade entry
		replace flag=1 if school=="BosPrep" & (year<=2010)
	* Match MS was 6th grade entry
	replace flag=1 if school=="MATCH_MS" & (year<=2010)
	*Four Rivers is 7th grade entry
	replace flag=1 if school=="FourRiv" &(year<=2011)
	*Parker is 7th grade entry
	replace flag=1 if school=="Parker" &(year<=2011)

/* We used to only keep the high schools and the middleplus schools for this dataset, but we will need to wait until later in 
	the program because we need to only look at the first lottery application 
* keep if grade==9|flag==1
* drop flag
*/
g highplus=1
g middle =0
g elem = 0 
save "highplus_audit_parent3", replace

foreach f in elem middle highplus {
	use "`f'_audit_parent3", clear
	
	*keep only first year in lotto for middle and high, don't want to change the MS and HS sample by including ES in this requirement yet
	*most of those dumped are those who are applying for 6th grade the year after applying for 5th grade
	
	* Drop if the student applies for multiple grades in middle school apps
	cap gen repeat_applicants= (firstapp==0 & grade>=5)
	cap drop if repeat_applicants==1 // if later you want to have LTO sample be different, then look at this - then you might want 
		* to change the risk sets and offers to just be for the high school applications
	
	cap keep if grade==9|flag==1 /* just keep the middleplus and high school students that had their first charter be a middleplus or highschool */
			/* Note: this command does NOT affect the middle school applicant dataset at all or elementary */
	drop if disqualified==1
	drop if lateapplicant==1
	drop if outofarea==1
	drop if sibling==1
	drop if unmatched==1
	drop if dup==1 // added by ES 3/20/15
	
	* for ES, drop if the student applies in multiple years in ES
	drop if firstapp==0 & grade<=4
	
	drop disqualified lateapplicant outofarea sibling unmatched sib* y basedon 
	cap drop flag 
	
	save "`f'_applicants_long_parent3", replace

	*reshape
	drop school dateoflottery /*dob*/ //Elizabeth Added dropping date of lottery; KP put dob back in
	drop lotterynumber waitlistnumber // EMS added 8-19-2013 - don't need this for analysis
	duplicates drop
		count
	
	*risk sets
	*These get redone later based on sample
	
************************************************************	
/* KP omitted 7/14/20
	* UCS instruments
	gen instru_initial_RoxPrep = initial_offerRoxPrep * ucs_first_RP
	gen instru_initial_GroveH = initial_offerGroveH * ucs_first_GH
	gen instru_initial_DP = initial_offerDP * ucs_first_DP

	gen instru_offer_RoxPrep = offerRoxPrep * ucs_first_RP
	gen instru_offer_GroveH = offerGroveH * ucs_first_GH
	gen instru_offer_DP = offerDP * ucs_first_DP
	
	foreach off in initial offer {
		foreach sch in RoxPrep GroveH DP {
			replace instru_`off'_`sch'= 0 if instru_`off'_`sch'==.
		}
	}	
*/
************************************************************

*Add other samples here
	foreach x in urban noturban boston notboston {
		g offer_`x'=0 if offer~=.
		g initial_offer_`x'=0 if initial_offer~=.
		replace offer_`x'=1 if `x'==1& offer==1
		replace initial_offer_`x'=1 if `x'==1& initial_offer==1
		}

	
	*offer and initial offer are 1 if you have at ANY charter
	foreach v of varlist offer initial_offer *offer_boston *offer_notboston *offer_urban *offer_noturban *_lottery {
		bys sasid year: egen max=max(`v') 
		replace `v'=1 if max==1
		drop max
		replace `v'=0 if `v'==.
	}	
	
	
	ren year yearapp
	label var yearapp "Year of Lottery Application"
	ren grade gradeapp
	label var gradeapp "Grade of Lottery Application"
	label var initial_offer "Initial offer at ANY charter school"
	label var offer "Ever offer at ANY charter school"
	label var boston_lottery "Applied to Boston charter(s)"
	label var notboston_lottery "Applied to non-Boston charter(s)" 
	
	replace gradeapp=5 if gradeapp==56	
	g proj_year12=yearapp+13-gradeapp
	label var proj_year12 "Projected HS Graduation Year"
	drop caplast capfirst
	
	
	*KEEP ONLY NEEDED VARIABLES FOR PARENT FILES
	keep sasid birthdate yearapp gradeapp *lottery apply* *offer* proj_year12 elem highplus middle parent1firstname parent1lastname parent2firstname parent2lastname streetaddress townofresidence 
	drop lottery
	
	save "`f'_parent3", replace
	save "$data_setup/`f'_applicants_wide_parent3.dta", replace
	}
	*inividual grade file adjustments
use "$data_setup/highplus_applicants_wide_parent3.dta", clear
	keep if gradeapp==9 /*added by ems */
	*ren d_* hs_d_*
append using "$data_setup/middle_applicants_wide_parent3.dta"
	*ren d_* ms_d_*
append using "$data_setup/elem_applicants_wide_parent3.dta"
	*ren d_* es_d_*

foreach v of varlist highplus middle elem{
	replace `v' = 0 if `v'==.
	bys sasid: egen max=max(`v')
	replace `v'=max
	drop max
}

*make offer variables mutually exclusive to make FS more interpretable
foreach v of varlist offer offer_urban offer_noturban offer_boston offer_notboston{
	ren `v' waitlist_`v'
	replace waitlist_`v' = 0 if initial_`v'==1
	}
	
	*PARENT NAME CLEANUP - KP added 7/14/20
replace parent1firstname=upper(trim(itrim(parent1firstname)))
replace parent1lastname=upper(trim(itrim(parent1lastname)))
replace parent2firstname=upper(trim(itrim(parent2firstname)))
replace parent2lastname=upper(trim(itrim(parent2lastname)))

/*order sasid yearapp gradeapp proj_year12 initial_offer_* waitlist_offer* ///
	apply* applyprioritygroup risk_* */
	
duplicates drop 
format sasid %12.0f
duplicates drop //these are folks who are in the file twice because they are both ms and HS applicants
duplicates report sasid


save "$data_setup\all_applicants_wide_parent_PARENTSONLY_V3.dta", replace

}

** SAVING A SMALLER VERSION **
use "$data_setup\all_applicants_wide_parent_PARENTSONLY_V3.dta", clear

*keep only requested info
keep yearapp sasid birthdate streetaddress parent1firstname parent1lastname parent2firstname parent2lastname townofresidence gradeapp apply*


*one school is mixed up and we can't use it (don't have orig data)
replace town = "" if town=="MA"
replace parent2lastname="" if parent2lastname=="?" |parent2lastname=="(Foster"|parent2lastname=="(guardian)"
replace parent1lastname="" if parent1lastname=="?" |parent1lastname=="(Foster"|parent1lastname=="(guardian)"

foreach v of varlist parent* town street{
	replace `v'=trim(upper(`v'))
	}
	
*clean street address var
	replace streetaddress=upper(trim(itrim(streetaddress)))
	replace streetaddress=subinstr(streetaddress, ",", "", .)

*clean up street address
replace street = subinstr(street,"  "," ",.)
replace street = subinstr(street,"LN.","LN",.)
replace street = subinstr(street,"LANE","LN",.)
replace street = subinstr(street,"CIR.","CIR",.)
replace street = subinstr(street,"CIRCLE","CIR",.)
replace street = subinstr(street,"PKWY.","PKWY",.)
replace street = subinstr(street,"PARKWAY","PKWY",.)
replace street = subinstr(street,"PL.","PL",.)
replace street = subinstr(street,"PLACE","PL",.)
replace street = subinstr(street,"PK.","PK",.)
replace street = subinstr(street,"PARK","PK",.)
replace street = subinstr(street,"CT.","CT",.)
replace street = subinstr(street,"COURT","CT",.)
replace street = subinstr(street,"ST.","ST",.)
replace street = subinstr(street,"STREET","ST",.)
replace street = subinstr(street,"BLVD.","BLVD",.)
replace street = subinstr(street,"BOULEVARD","BLVD",.)
replace street = subinstr(street,"AVE.","AVE",.)
replace street = subinstr(street,"AVENUE","AVE",.)
replace street = subinstr(street,"DR.","DR",.)
replace street = subinstr(street,"DRIVE","DR",.)
replace street = subinstr(street,"TERR.","TER",.)
replace street = subinstr(street,"TERRACE","TER",.)
replace street = subinstr(street,"ROAD","RD",.)
replace street = subinstr(street,"RD.","RD",.)
replace street = subinstr(street,"#","APT ",.)
replace street = subinstr(street,"APT APT","APT ",.)
replace street = subinstr(street,"APARTMENT","APT ",.)
replace street = subinstr(street,",","",.)


	// take out all periods
	replace streetaddress=subinstr(streetaddress, "ST.", "ST", .)
	replace streetaddress=subinstr(streetaddress, "AVE.", "AVE", .)
	replace streetaddress=subinstr(streetaddress, "DR.", "DR", .)
	replace streetaddress=subinstr(streetaddress, "RD.", "RD", .)
	replace streetaddress=subinstr(streetaddress, "APT.", "APT", .)
	replace streetaddress=subinstr(streetaddress, "CT.", "CT", .)
	replace streetaddress=subinstr(streetaddress, "PL.", "PL", .)
	replace streetaddress=subinstr(streetaddress, "TERR.", "TER", .)
	replace streetaddress=subinstr(streetaddress, "BLVD.", "BLVD", .)
	replace streetaddress=subinstr(streetaddress, "P.O.", "PO", .)
	
	// replace full words with USPS abbreviations
	replace streetaddress=subinstr(streetaddress, "STREET", "ST", .)
	replace streetaddress=subinstr(streetaddress, "AVENUE", "AVE", .)
	replace streetaddress=subinstr(streetaddress, "DRIVE", "DR", .)	
	replace streetaddress=subinstr(streetaddress, "ROAD", "RD", .)
	replace streetaddress=subinstr(streetaddress, "APARTMENT", "APT", .)
	replace streetaddress=subinstr(streetaddress, "COURT", "CT", .)
	replace streetaddress=subinstr(streetaddress, "PLACE", "PL", .)
	replace streetaddress=subinstr(streetaddress, "PK", "PARK", .)
	replace streetaddress=subinstr(streetaddress, "TERRACE", "TER", .)
	
	g parentflag = parent1lastname!="" | parent2lastname!=""
	g addressflag =  streetaddress!=""
	

save "$data_setup\all_applicants_wide_parent_PARENTSONLY_V4_small.dta", replace

